Kernel Author:
Bhishan Poudel, Ph.D Contd. Astrophysics

Date    : Jan 31, 2020
Topic    : ngals5k z1.5

Introduction

Update

  1. Looked at gm0 vs gc0 (and gm1 vs gc1) 45 degree line and removed outliers.
  2. Find the weights for g_sq for given magnitude bins using smooth fitting curve.

Usual Filtering

df = df.query('calib_psfCandidate == 0.0')
df = df.query('deblend_nChild == 0.0')
df['ellip'] = np.hypot( df['ext_shapeHSM_HsmShapeRegauss_e1'] ,
                        df['ext_shapeHSM_HsmShapeRegauss_e2'] )
df = df.query('ellip < 2.0') # it was 1.5 before

#select only few columns after filtering:
cols_select = ['base_SdssCentroid_x', 'base_SdssCentroid_y',
                'base_SdssCentroid_xSigma','base_SdssCentroid_ySigma',
                'ext_shapeHSM_HsmShapeRegauss_e1','ext_shapeHSM_HsmShapeRegauss_e2',
                'base_SdssShape_flux']
df = df[cols_select]        

# drop all nans
df = df.dropna()

# additional columns
df['radius'] =  df.eval(""" ( (ext_shapeHSM_HsmSourceMoments_xx *  ext_shapeHSM_HsmSourceMoments_yy) \
                                          -  (ext_shapeHSM_HsmSourceMoments_xy**2 ) )**0.25 """)

Shape filtering
https://github.com/LSSTDESC/DC2-analysis/blob/master/tutorials/object_gcr_2_lensing_cuts.ipynb

df = df.query('ext_shapeHSM_HsmShapeRegauss_resolution >= 0.3')
df = df.query('ext_shapeHSM_HsmShapeRegauss_sigma <= 0.4')
df = df.query('ext_shapeHSM_HsmShapeRegauss_flag== 0.0')

Filter strongly lensed objects

  • Take the objects with centroids >154 pixels (remove strong lens objects).
    # exclude strong lens objects <=154 distance
    # The shape of lsst.fits file is 3998,3998 and center is 1699,1699.
    df['x_center'] = 1699
    df['y_center'] = 1699
    df['distance'] = ( (df['x[0]'] - df['x_center'])**2 + (df['x[1]'] - df['y_center'])**2 )**0.5
    df = df[df.distance > 154]
    

Imcat script

# create new columns and cleaning (four files)
lc -C -n fN -n id -N '1 2 x' -N '1 2 errx' -N '1 2 g' -n ellip -n flux -n radius < "${M9T}".txt  |  lc +all 'mag = %flux log10 -2.5 *'  |  cleancat 15  |  lc +all -r 'mag' > "${M9C}".cat


# merge 4 catalogs
mergecats 5 "${MC}".cat "${M9C}".cat "${LC}".cat "${L9C}".cat > ${catalogs}/merge.cat &&


lc -b +all 
'x = %x[0][0] %x[1][0] + %x[2][0] + %x[3][0] + 4 / %x[0][1] %x[1][1] + %x[2][1] + %x[3][1] + 4 / 2 vector'
'gm = %g[0][0] %g[1][0] + 2 / %g[0][1] %g[1][1] + 2 / 2 vector' 
'gc = %g[2][0] %g[3][0] + 2 / %g[2][1] %g[3][1] + 2 / 2 vector'   
'gmd = %g[0][0] %g[1][0] - 2 / %g[0][1] %g[1][1] - 2 / 2 vector' 
'gcd = %g[2][0] %g[3][0] - 2 / %g[2][1] %g[3][1] - 2 / 2 vector' 
< ${catalogs}/merge.cat > ${final}/final_${i}.cat

Notes

final_text.txt is created by imcat program after merging four lsst files (m,m9,l,l9) after cleaning.

Imports

In [1]:
import json, os,sys
import numpy as np
import pandas as pd
import seaborn as sns
import time
sns.set(color_codes=True)

import plotly
import ipywidgets

pd.set_option('display.max_columns',200)
time_start_notebook = time.time()

import matplotlib.pyplot as plt
plt.style.use('ggplot')
%matplotlib inline

print([(x.__name__, x.__version__) for x in [np,pd,sns,plotly,ipywidgets]])
/Users/poudel/Library/Enthought/Canopy/edm/envs/deeplr/lib/python3.6/site-packages/statsmodels/tools/_testing.py:19: FutureWarning: pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.
  import pandas.util.testing as tm
[('numpy', '1.17.4'), ('pandas', '1.0.0rc0'), ('seaborn', '0.9.0'), ('plotly', '4.1.1'), ('ipywidgets', '6.0.0')]
In [2]:
%%javascript
IPython.OutputArea.auto_scroll_threshold = 9999;

Load the final text cleancat15 data

g_sq = g00 g00 + g10 g10
gmd_sq = gmd0**2 + gmd1**2
In [3]:
!head -2 ../data/cleancat/final_text_cleancat15_ngals5k_z1.5_000_059.txt
#       fN[0][0]       fN[1][0]       fN[2][0]       fN[3][0]       id[0][0]       id[1][0]       id[2][0]       id[3][0]           x[0]           x[1]     errx[0][0]     errx[0][1]     errx[1][0]     errx[1][1]     errx[2][0]     errx[2][1]     errx[3][0]     errx[3][1]        g[0][0]        g[0][1]        g[1][0]        g[1][1]        g[2][0]        g[2][1]        g[3][0]        g[3][1]    ellip[0][0]    ellip[1][0]    ellip[2][0]    ellip[3][0]     flux[0][0]     flux[1][0]     flux[2][0]     flux[3][0]   radius[0][0]   radius[1][0]   radius[2][0]   radius[3][0]      mag[0][0]      mag[1][0]      mag[2][0]      mag[3][0]          gm[0]          gm[1]          gc[0]          gc[1]         gmd[0]         gmd[1]         gcd[0]         gcd[1]
               0              0              0              0            673            683            677            693      647.97127      1502.7331         0.1035         0.1735         0.1048         0.1684         0.1034         0.1743         0.1048         0.1685          -0.89         0.3539         -0.844         0.5361        -0.8916         0.3642        -0.8501         0.5359      0.9577814      0.9998696     0.96311588      1.0049173      6065.0547      6034.4473      6061.3064       6030.099      4.3237747      4.2545658       4.322709      4.2532389     -9.4570868     -9.4515937     -9.4564156     -9.4508111         -0.867          0.445       -0.87085        0.45005         -0.023        -0.0911       -0.02075       -0.08585
In [5]:
names = "fN[0][0]       fN[1][0]       fN[2][0]       fN[3][0]       id[0][0]       id[1][0]       id[2][0]       id[3][0]           x[0]           x[1]     errx[0][0]     errx[0][1]     errx[1][0]     errx[1][1]     errx[2][0]     errx[2][1]     errx[3][0]     errx[3][1]        g[0][0]        g[0][1]        g[1][0]        g[1][1]        g[2][0]        g[2][1]        g[3][0]        g[3][1]    ellip[0][0]    ellip[1][0]    ellip[2][0]    ellip[3][0]     flux[0][0]     flux[1][0]     flux[2][0]     flux[3][0]   radius[0][0]   radius[1][0]   radius[2][0]   radius[3][0]      mag[0][0]      mag[1][0]      mag[2][0]      mag[3][0]          gm[0]          gm[1]          gc[0]          gc[1]         gmd[0]         gmd[1]         gcd[0]         gcd[1]"
print(names)
fN[0][0]       fN[1][0]       fN[2][0]       fN[3][0]       id[0][0]       id[1][0]       id[2][0]       id[3][0]           x[0]           x[1]     errx[0][0]     errx[0][1]     errx[1][0]     errx[1][1]     errx[2][0]     errx[2][1]     errx[3][0]     errx[3][1]        g[0][0]        g[0][1]        g[1][0]        g[1][1]        g[2][0]        g[2][1]        g[3][0]        g[3][1]    ellip[0][0]    ellip[1][0]    ellip[2][0]    ellip[3][0]     flux[0][0]     flux[1][0]     flux[2][0]     flux[3][0]   radius[0][0]   radius[1][0]   radius[2][0]   radius[3][0]      mag[0][0]      mag[1][0]      mag[2][0]      mag[3][0]          gm[0]          gm[1]          gc[0]          gc[1]         gmd[0]         gmd[1]         gcd[0]         gcd[1]
In [4]:
names = ['fN[0][0]','fN[1][0]','fN[2][0]','fN[3][0]',
 'id[0][0]','id[1][0]','id[2][0]','id[3][0]',
 'x[0]','x[1]',
 'errx[0][0]','errx[0][1]','errx[1][0]','errx[1][1]','errx[2][0]',
 'errx[2][1]','errx[3][0]','errx[3][1]',
 'g[0][0]','g[0][1]','g[1][0]','g[1][1]','g[2][0]','g[2][1]','g[3][0]','g[3][1]',
 'ellip[0][0]','ellip[1][0]','ellip[2][0]','ellip[3][0]',
 'flux[0][0]','flux[1][0]','flux[2][0]','flux[3][0]',
 'radius[0][0]','radius[1][0]','radius[2][0]','radius[3][0]',
 'mag[0][0]','mag[1][0]','mag[2][0]','mag[3][0]',
 'gm[0]','gm[1]','gc[0]', 'gc[1]',
 'gmd[0]','gmd[1]','gcd[0]','gcd[1]']


file_path = '../data/cleancat/final_text_cleancat15_ngals5k_z1.5_000_059.txt'


df = pd.read_csv(file_path,comment='#',engine='python',sep=r'\s\s+',
                 header=None,names=names)

print(df.shape)

# new columns
# df['g_sq'] = df['g[0][0]'] **2 + df['g[1][0]']**2 # only for imcat 00 and 10
# df['gmd_sq'] = df['gmd[0]'] **2 + df['gmd[1]']**2

df['g_sq'] = df['g[0][0]'] **2 + df['g[0][1]']**2
df['gmd_sq'] = df['gmd[0]'] **2 + df['gmd[1]']**2

df['gm_sq'] = df['gm[0]']**2 + df['gm[1]']**2
df['gc_sq'] = df['gc[0]']**2 + df['gc[1]']**2

df['mag_mono'] = (df['mag[0][0]'] + df['mag[1][0]'] ) / 2
df['mag_chro'] = (df['mag[2][0]'] + df['mag[3][0]'] ) / 2

df.head()
(16297, 50)
Out[4]:
fN[0][0] fN[1][0] fN[2][0] fN[3][0] id[0][0] id[1][0] id[2][0] id[3][0] x[0] x[1] errx[0][0] errx[0][1] errx[1][0] errx[1][1] errx[2][0] errx[2][1] errx[3][0] errx[3][1] g[0][0] g[0][1] g[1][0] g[1][1] g[2][0] g[2][1] g[3][0] g[3][1] ellip[0][0] ellip[1][0] ellip[2][0] ellip[3][0] flux[0][0] flux[1][0] flux[2][0] flux[3][0] radius[0][0] radius[1][0] radius[2][0] radius[3][0] mag[0][0] mag[1][0] mag[2][0] mag[3][0] gm[0] gm[1] gc[0] gc[1] gmd[0] gmd[1] gcd[0] gcd[1] g_sq gmd_sq gm_sq gc_sq mag_mono mag_chro
0 0 0 0 0 673 683 677 693 647.97127 1502.7331 0.1035 0.1735 0.1048 0.1684 0.1034 0.1743 0.1048 0.1685 -0.8900 0.3539 -0.8440 0.5361 -0.8916 0.3642 -0.8501 0.5359 0.957781 0.999870 0.963116 1.004917 6065.0547 6034.4473 6061.3064 6030.0990 4.323775 4.254566 4.322709 4.253239 -9.457087 -9.451594 -9.456416 -9.450811 -0.86700 0.44500 -0.87085 0.45005 -0.02300 -0.09110 -0.02075 -0.08585 0.917345 0.008828 0.949714 0.960925 -9.454340 -9.453613
1 0 0 0 0 3499 3499 3491 3495 2094.70640 2363.5999 0.1814 0.1514 0.1712 0.1469 0.1818 0.1517 0.1715 0.1470 0.4744 0.7372 0.4634 0.8546 0.4766 0.7466 0.4695 0.8588 0.876652 0.972153 0.885753 0.978758 5224.5019 5129.3051 5216.0866 5122.0497 4.365375 4.175324 4.361294 4.172532 -9.295112 -9.275146 -9.293362 -9.273610 0.46890 0.79590 0.47305 0.80270 0.00550 -0.05870 0.00355 -0.05610 0.768519 0.003476 0.853324 0.868104 -9.285129 -9.283486
2 0 0 0 0 3839 3837 3834 3840 3125.41390 2716.2394 0.0058 0.0093 0.0084 0.0059 0.0058 0.0092 0.0084 0.0059 -1.0315 -0.4696 1.1092 0.3567 -1.0306 -0.4490 1.0890 0.3394 1.133365 1.165144 1.124161 1.140664 105794.9600 105897.8200 105815.4500 105925.2100 4.143002 4.102367 4.143918 4.103612 -12.561162 -12.562218 -12.561373 -12.562498 0.03885 -0.05645 0.02920 -0.05480 -1.07035 -0.41315 -1.05980 -0.39420 1.284516 1.316342 0.004696 0.003856 -12.561690 -12.561936
3 0 0 0 0 3448 3427 3418 3423 2875.39700 2270.5155 0.5592 0.5932 0.3278 0.4274 0.5733 0.6035 0.3305 0.4297 0.0053 -0.2876 -0.3449 0.0699 0.0002 -0.2808 -0.3434 0.0523 0.287649 0.351912 0.280800 0.347360 2454.1737 2466.8531 2452.0065 2459.7026 4.507311 4.501422 4.544499 4.530116 -8.474763 -8.480358 -8.473804 -8.477206 -0.16980 -0.10885 -0.17160 -0.11425 0.17510 -0.17875 0.17180 -0.16655 0.082742 0.062612 0.040680 0.042500 -8.477561 -8.475505
4 0 0 0 0 3758 3762 3737 3733 2294.16330 2602.7944 0.0066 0.0103 0.0106 0.0074 0.0066 0.0103 0.0106 0.0074 -1.0435 0.3069 0.9961 -0.4738 -1.0598 0.3144 0.9957 -0.4814 1.087695 1.103042 1.105452 1.105968 89198.4270 90320.9920 89218.3080 90366.8230 4.058412 4.231359 4.058714 4.233707 -12.375893 -12.389472 -12.376135 -12.390023 -0.02370 -0.08345 -0.03205 -0.08350 -1.01980 0.39035 -1.02775 0.39790 1.183080 1.192365 0.007526 0.007999 -12.382682 -12.383079

Plot g-squared for monochromatic and chromatic files

In [7]:
fig,ax = plt.subplots(1,2,figsize=(14,6))

df['gm_sq'].plot.hist(bins=50,ax=ax[0],color='b',label='mono')
ax[0].set_xlabel('gm_sq')
ax[0].legend()
ax[0].set_title('g-squared histogram for mono')

# gcsq
df['gc_sq'].plot.hist(bins=50,ax=ax[1],color='r',label='chro')
ax[1].set_xlabel('gc_sq')
ax[1].legend()
ax[1].set_title('g-squared histogram for chro')

# savefig
plt.savefig('images/gmsq_gcsq_hist.png')
In [8]:
plt.figure(figsize=(12,8))
sns.distplot(df['g_sq'],label='g_sq')
sns.distplot(df['gmd_sq'],label='gmd_sq')

plt.legend()
Out[8]:
<matplotlib.legend.Legend at 0x1257b3198>

Contour Plots for Diagonal cuts

In [9]:
import plotly
import plotly.graph_objs as go
import plotly.figure_factory as ff
import plotly.tools as tls
from plotly.offline import plot, iplot, init_notebook_mode
init_notebook_mode(connected=False)
In [10]:
def matrix_of_number_density_from_two_cols(df,xcol,ycol,N):
    """Create grid of number density of two columns
    
    - Find the absolute max from two columns.
    - Create N bins -absMax to +absMax.
    - Return a matrix of N*N shape.
    """
    from itertools import product
    
    # derived variables
    xlabel = xcol
    ylabel = ycol
    xlabel1 = xlabel + '_cat_labels'
    ylabel1 = ylabel + '_cat_labels'
    
    xlabel2 = xlabel + '_cat'
    ylabel2 = ylabel + '_cat'
    colname = 'cat_freq'
    
    # take only xlabel and ylabel columns
    dx = df[[xlabel, ylabel]].copy()
    
    # get absolute maximum from two columns
    tolerance = 0.0000001
    max_abs_xcol_ycol = df[[xcol,ycol]].describe().iloc[[3,-1],:].abs().max().max()
    
    # create 1d array with N+1 values to create N bins
#     bins = np.linspace(-max_abs_xcol_ycol-tolerance, max_abs_xcol_ycol+tolerance,N+1)
    bins = np.linspace(0, max_abs_xcol_ycol,N+1)

    # create N bins
    dx[xlabel1] = pd.cut(dx[xlabel], bins, labels=np.arange(N))
    dx[ylabel1] = pd.cut(dx[ylabel], bins, labels=np.arange(N))

    # count number of points in each bin
    dx[colname] = dx.groupby([xlabel1,ylabel1])[xlabel1].transform('count')

    # drop duplicates
    dx1 = dx.drop_duplicates(subset=[xlabel1,ylabel1])[[xlabel1,ylabel1,colname]]

    # use permutation to get the grid of N * N
    perms = list(product(range(N), range(N)))
    x = [i[0] for i in perms]
    y = [i[1] for i in perms]
    dx2 = pd.DataFrame({xlabel1: x, ylabel1: y, colname:0})

    # update dx2 to merge frequency values
    dx2.update(dx2.drop(colname,1).merge(dx1,how='left'))
    dx2 = dx2.astype(int)

    # z values to plot heatmap
    z = dx2[colname].values.reshape(N,N)
    z = z.T

    return z

Transform and scale the data

In [96]:
def transform_scale(z,transform='linear',scale=None):
    """Transform and scale given 1d numpy array.
    
    transform: linear, log, sqrt, sinh, arcsinh
    scale    : minmax, zscale
    
    """
    #==================================
    # linear, log, sqrt, arcsinh, sinh 
    #
    # we need linear tranform option to compare.
    if transform == 'linear':
        z = z

    if transform == 'log':
        z = np.log1p(z)
        
    if transform == 'sqrt':
        z = np.sqrt(z)

    if transform == 'sinh':
        z = np.sinh(z)
        
    if transform == 'arcsinh':
        z = np.arcsinh(z)
    
    #===============================
    # scaling minmax and zscale
    if scale== 'minmax':
        z = z / (z.max()-z.min())
    if scale == 'zscale':
        z = (z-z.mean()) / z.std()
    if scale is None:
        z = z
        
    return z

plot the countours

In [103]:
def plot_contour(Z,colorscale,x1y1x2y2=None,
                 title='Contour plot',
                 xlabel='',
                 ylabel=''):
    """Plot the contour plot.

    Contour plot from given 2d array.
    
    Example:
    -----------
    z  = matrix_of_number_density_from_two_cols(df,'gsq','gmdsq',N)
    z1 = transform_scale(z,transform=transform,scale=scale)

    plot_contour(z1, colorscale='Viridis', x1y1x2y2=[10,0,99,90],
            title=f'Contour plot: {transform}+{scale}',
            xlabel='Bin number of gsq',
            ylabel='Bin number of gmsq')
    
    """

    trace1 = go.Contour(z=Z, colorscale=colorscale)
    axis_layout = dict(
        showticklabels = True
    )
    
    xaxis = {**axis_layout, **{'title':f'{xlabel}'}}
    yaxis = {**axis_layout, **{'title':f'{ylabel}'}}

    layout = go.Layout(
        width=1000,
        height=1000,
        autosize=False,
        title=f'{title} ',
        xaxis = xaxis,
        yaxis = yaxis,
    )
    

    data = [trace1]
    
    if x1y1x2y2:

        # center line 
        p2x1, p2y1 = 0,0
        p2x2, p2y2 = 99,99
        sc1 = go.Scatter(x=[p2x1,p2x2],
                         y=[p2y1,p2y2],
                         mode = 'lines+markers',
                         name = f'line ({p2x1},{p2y1}) to ({p2x2},{p2y2})')
        
        sc2 = go.Scatter(x=[x1y1x2y2[0],x1y1x2y2[2]],
                         y=[x1y1x2y2[1],x1y1x2y2[3]],
                         mode = 'lines+markers',
                         name = f'line ({x1y1x2y2[0]},{x1y1x2y2[1]}) \
                         to ({x1y1x2y2[2]},{x1y1x2y2[3]})')


        data = [trace1, sc1, sc2]

    fig = go.Figure( data=data, layout=layout )
    
    # update figure
    fig.update_layout(
    xaxis = dict(tickmode = 'linear',dtick = 5),
    yaxis = dict(tickmode = 'linear',dtick = 5),
    )

    iplot(fig)
    return None

N = 100
transform='log' # linear log sqrt sinh
scale='minmax'


xcol,ycol = 'g_sq', 'gmd_sq'
z  = matrix_of_number_density_from_two_cols(df,xcol,ycol,N)
z1 = transform_scale(z,transform=transform,scale=scale)

plot_contour(z1, colorscale='Viridis', x1y1x2y2=[10,0,99,90],
            title=f'Contour plot for transform =  {transform} and scaling = {scale}',
            xlabel=f'Bin number of {xcol}',
            ylabel=f'Bin number of {ycol}')

Grid of N*N from g_sq and gmd_sq

In [13]:
N = 100
xcol = 'g_sq'
ycol = 'gmd_sq'
max_abs_xcol_ycol = df[[xcol,ycol]].abs().max().max()

print(max_abs_xcol_ycol)

df[df[xcol]==max_abs_xcol_ycol]
3.99436938
Out[13]:
fN[0][0] fN[1][0] fN[2][0] fN[3][0] id[0][0] id[1][0] id[2][0] id[3][0] x[0] x[1] errx[0][0] errx[0][1] errx[1][0] errx[1][1] errx[2][0] errx[2][1] errx[3][0] errx[3][1] g[0][0] g[0][1] g[1][0] g[1][1] g[2][0] g[2][1] g[3][0] g[3][1] ellip[0][0] ellip[1][0] ellip[2][0] ellip[3][0] flux[0][0] flux[1][0] flux[2][0] flux[3][0] radius[0][0] radius[1][0] radius[2][0] radius[3][0] mag[0][0] mag[1][0] mag[2][0] mag[3][0] gm[0] gm[1] gc[0] gc[1] gmd[0] gmd[1] gcd[0] gcd[1] g_sq gmd_sq gm_sq gc_sq mag_mono mag_chro
21605 38 38 38 38 6451 6450 6448 6407 2519.428 2550.8574 0.0455 0.0406 0.0467 0.0424 0.0459 0.0408 0.0473 0.0426 1.9293 0.5217 1.5932 -0.1021 1.719 0.451 1.4994 -0.1034 1.998592 1.596468 1.777178 1.502961 12487.845 12648.607 12818.484 12914.576 4.594356 4.671837 4.688169 4.746876 -10.241219 -10.255107 -10.269592 -10.2777 1.76125 0.2098 1.6092 0.1738 0.16805 0.3119 0.1098 0.2772 3.994369 0.125522 3.146018 2.619731 -10.248163 -10.273646
In [14]:
# create 1d array with N+1 values to create N bins
bins = np.linspace(0, max_abs_xcol_ycol,N+1)

# bins dict
bins_dict = {i:v for i,v in enumerate(bins)}

# create N bins
ser_ycol_bins = pd.cut(df[ycol], bins=bins,)

df['g_sq_bins'] = ser_ycol_bins
df[['g_sq','g_sq_bins']].head()
Out[14]:
g_sq g_sq_bins
0 0.215290 (0.16, 0.2]
1 0.926680 (0.0, 0.0399]
2 1.270152 (1.198, 1.238]
3 1.261045 (0.0, 0.0399]
4 0.069621 (0.0399, 0.0799]
In [15]:
# bin 0  ==> 0          to 0.03994369
# bin 1  ==> 0.03994369 to 0.07988739
# bin 10 ==> 0.39943694 to 0.43938063

bins[10], bins[11]
Out[15]:
(0.399436938, 0.4393806318)

Analysis of Points above gmd_sq bin number 10

What is the corresponding gmsq value to y-axis bin number 10 (11th bin)?

The 100*100 bin is created from absMax of g_sq and gmd_sq. Then the line 0 to absMax is divided into 100 parts and bins are created.

bin 10 for gmd_sq is 0.39943694 to 0.43938063

In [16]:
# take all points where gmd_sq > 10th bin

df_gmd_sq_lt_bin10 = df[df.gmd_sq > bins[10]]

print(df_gmd_sq_lt_bin10.shape)

df_gmd_sq_lt_bin10.head()
(28372, 57)
Out[16]:
fN[0][0] fN[1][0] fN[2][0] fN[3][0] id[0][0] id[1][0] id[2][0] id[3][0] x[0] x[1] errx[0][0] errx[0][1] errx[1][0] errx[1][1] errx[2][0] errx[2][1] errx[3][0] errx[3][1] g[0][0] g[0][1] g[1][0] g[1][1] g[2][0] g[2][1] g[3][0] g[3][1] ellip[0][0] ellip[1][0] ellip[2][0] ellip[3][0] flux[0][0] flux[1][0] flux[2][0] flux[3][0] radius[0][0] radius[1][0] radius[2][0] radius[3][0] mag[0][0] mag[1][0] mag[2][0] mag[3][0] gm[0] gm[1] gc[0] gc[1] gmd[0] gmd[1] gcd[0] gcd[1] g_sq gmd_sq gm_sq gc_sq mag_mono mag_chro g_sq_bins
2 0 0 0 0 1301 1310 1323 1312 2652.56650 1772.3448 0.2510 0.1715 0.1663 0.3002 0.2522 0.1715 0.1665 0.3017 0.9614 0.5881 -0.9979 -0.4635 1.0062 0.6076 -1.0206 -0.4729 1.127010 1.100289 1.175422 1.124837 3694.2411 3674.4453 3684.164 3663.4596 4.161950 4.303319 4.159870 4.301257 -8.918813 -8.912980 -8.915847 -8.909728 -0.01825 0.06230 -0.00720 0.06735 0.97965 0.52580 1.01340 0.54025 1.270152 1.236180 0.004214 0.004588 -8.915896 -8.912788 (1.198, 1.238]
5 0 0 0 0 5592 1389 5393 1394 1668.12470 1882.8265 0.0453 0.0393 0.0394 0.0218 0.0457 0.0395 0.0393 0.0218 0.2646 0.9514 0.8981 -0.2964 0.2685 0.9593 0.9048 -0.3017 0.987510 0.945747 0.996167 0.953775 49474.3730 45594.8360 49496.897 45655.9450 5.496548 5.503342 5.499099 5.507981 -11.735951 -11.647289 -11.736445 -11.648743 0.58135 0.32750 0.58665 0.32880 -0.31675 0.62390 -0.31815 0.63050 0.975175 0.489582 0.445224 0.452268 -11.691620 -11.692594 (0.479, 0.519]
8 0 0 0 0 5895 5967 5826 5909 1448.97510 2334.9766 0.0188 0.0227 0.0239 0.0182 0.0189 0.0229 0.0241 0.0183 -0.2900 0.9268 0.6441 -0.7562 -0.3066 0.9498 0.6584 -0.7697 0.971112 0.993329 0.998060 1.012881 44277.4560 44510.9310 44328.676 44646.1230 4.390932 4.486290 4.404279 4.505218 -11.615457 -11.621167 -11.616712 -11.624459 0.17705 0.08530 0.17590 0.09005 -0.46705 0.84150 -0.48250 0.85975 0.943058 0.926258 0.038623 0.039050 -11.618312 -11.620586 (0.919, 0.959]
16 0 0 0 0 1916 1935 1936 1936 987.00513 2672.3523 0.0075 0.0098 0.0099 0.0079 0.0075 0.0098 0.0100 0.0079 -0.4763 0.8956 0.6350 -0.8553 -0.5089 0.9281 0.6544 -0.8827 1.014377 1.065253 1.058465 1.098817 91854.4360 93143.4830 91915.122 93209.8860 4.097640 4.188533 4.106857 4.199608 -12.407750 -12.422881 -12.408467 -12.423655 0.07935 0.02015 0.07275 0.02270 -0.55565 0.87545 -0.58165 0.90540 1.028961 1.075160 0.006702 0.005808 -12.415316 -12.416061 (1.039, 1.078]
17 0 0 0 0 4798 1156 4737 1155 1993.05820 1545.4589 0.0838 0.0597 0.0636 0.1048 0.0842 0.0598 0.0637 0.1052 1.0009 -0.3474 -0.7955 0.6838 1.0567 -0.3781 -0.8061 0.6933 1.059475 1.049001 1.122308 1.063232 10896.1520 10999.3280 10882.732 10990.3080 4.145457 4.541074 4.145158 4.539750 -10.093183 -10.103415 -10.091845 -10.102525 0.10270 0.16820 0.12530 0.15760 0.89820 -0.51560 0.93140 -0.53570 1.122488 1.072607 0.038839 0.040538 -10.098299 -10.097185 (1.039, 1.078]
In [17]:
# fN[0][0]  ==> lsst_mono_z1.5_000.fits
# fN[1][0]  ==> lsst_mono90_z1.5_000.fits
#
# id[0][0]  ==> id of given object for mono file number 0
In [18]:
# take only first file number 0 (it has m,m9,c and c9)

df_gmd_sq_lt_bin10_file0 = df_gmd_sq_lt_bin10[df_gmd_sq_lt_bin10['fN[0][0]'] == 0]

# add radius for mono
df_gmd_sq_lt_bin10_file0['radius_mono'] = \
(df_gmd_sq_lt_bin10_file0['radius[0][0]'] + 
 df_gmd_sq_lt_bin10_file0['radius[1][0]'] ) /2.0

print(df_gmd_sq_lt_bin10_file0.shape)

df_gmd_sq_lt_bin10_file0.head()
(120, 58)
Out[18]:
fN[0][0] fN[1][0] fN[2][0] fN[3][0] id[0][0] id[1][0] id[2][0] id[3][0] x[0] x[1] errx[0][0] errx[0][1] errx[1][0] errx[1][1] errx[2][0] errx[2][1] errx[3][0] errx[3][1] g[0][0] g[0][1] g[1][0] g[1][1] g[2][0] g[2][1] g[3][0] g[3][1] ellip[0][0] ellip[1][0] ellip[2][0] ellip[3][0] flux[0][0] flux[1][0] flux[2][0] flux[3][0] radius[0][0] radius[1][0] radius[2][0] radius[3][0] mag[0][0] mag[1][0] mag[2][0] mag[3][0] gm[0] gm[1] gc[0] gc[1] gmd[0] gmd[1] gcd[0] gcd[1] g_sq gmd_sq gm_sq gc_sq mag_mono mag_chro g_sq_bins radius_mono
2 0 0 0 0 1301 1310 1323 1312 2652.56650 1772.3448 0.2510 0.1715 0.1663 0.3002 0.2522 0.1715 0.1665 0.3017 0.9614 0.5881 -0.9979 -0.4635 1.0062 0.6076 -1.0206 -0.4729 1.127010 1.100289 1.175422 1.124837 3694.2411 3674.4453 3684.164 3663.4596 4.161950 4.303319 4.159870 4.301257 -8.918813 -8.912980 -8.915847 -8.909728 -0.01825 0.06230 -0.00720 0.06735 0.97965 0.52580 1.01340 0.54025 1.270152 1.236180 0.004214 0.004588 -8.915896 -8.912788 (1.198, 1.238] 4.232634
5 0 0 0 0 5592 1389 5393 1394 1668.12470 1882.8265 0.0453 0.0393 0.0394 0.0218 0.0457 0.0395 0.0393 0.0218 0.2646 0.9514 0.8981 -0.2964 0.2685 0.9593 0.9048 -0.3017 0.987510 0.945747 0.996167 0.953775 49474.3730 45594.8360 49496.897 45655.9450 5.496548 5.503342 5.499099 5.507981 -11.735951 -11.647289 -11.736445 -11.648743 0.58135 0.32750 0.58665 0.32880 -0.31675 0.62390 -0.31815 0.63050 0.975175 0.489582 0.445224 0.452268 -11.691620 -11.692594 (0.479, 0.519] 5.499945
8 0 0 0 0 5895 5967 5826 5909 1448.97510 2334.9766 0.0188 0.0227 0.0239 0.0182 0.0189 0.0229 0.0241 0.0183 -0.2900 0.9268 0.6441 -0.7562 -0.3066 0.9498 0.6584 -0.7697 0.971112 0.993329 0.998060 1.012881 44277.4560 44510.9310 44328.676 44646.1230 4.390932 4.486290 4.404279 4.505218 -11.615457 -11.621167 -11.616712 -11.624459 0.17705 0.08530 0.17590 0.09005 -0.46705 0.84150 -0.48250 0.85975 0.943058 0.926258 0.038623 0.039050 -11.618312 -11.620586 (0.919, 0.959] 4.438611
16 0 0 0 0 1916 1935 1936 1936 987.00513 2672.3523 0.0075 0.0098 0.0099 0.0079 0.0075 0.0098 0.0100 0.0079 -0.4763 0.8956 0.6350 -0.8553 -0.5089 0.9281 0.6544 -0.8827 1.014377 1.065253 1.058465 1.098817 91854.4360 93143.4830 91915.122 93209.8860 4.097640 4.188533 4.106857 4.199608 -12.407750 -12.422881 -12.408467 -12.423655 0.07935 0.02015 0.07275 0.02270 -0.55565 0.87545 -0.58165 0.90540 1.028961 1.075160 0.006702 0.005808 -12.415316 -12.416061 (1.039, 1.078] 4.143086
17 0 0 0 0 4798 1156 4737 1155 1993.05820 1545.4589 0.0838 0.0597 0.0636 0.1048 0.0842 0.0598 0.0637 0.1052 1.0009 -0.3474 -0.7955 0.6838 1.0567 -0.3781 -0.8061 0.6933 1.059475 1.049001 1.122308 1.063232 10896.1520 10999.3280 10882.732 10990.3080 4.145457 4.541074 4.145158 4.539750 -10.093183 -10.103415 -10.091845 -10.102525 0.10270 0.16820 0.12530 0.15760 0.89820 -0.51560 0.93140 -0.53570 1.122488 1.072607 0.038839 0.040538 -10.098299 -10.097185 (1.039, 1.078] 4.343265

Regions above and below for gsq vs gmdsq contour plot

For example, take points:
Lower line below the diagonal line
point on x-axis: x1,y1 = 10,0
point on y-axis: x2,y2 = 99,90

here 20,0,99,80 are bin number, their values are obtained from bins_dict
x1,y1 = bins_dict[10], bins_dict[0]
x2,y2 = bins_dict[99], bins_dict[90]


Equation of straight line:
y-y1 = y2-y1 * (x-x1)
       -----
       x2-x1

boundary: (x2-x1) * (y-y1) - (y2-y1) * (x-x1)
In [19]:
N = 100
bins = np.linspace(0, max_abs_xcol_ycol,N+1)
bins_dict = {i:v for i,v in enumerate(bins)}

# points from contour plot
x1y1x2y2 = [10,0,99,90]
print(x1y1x2y2)

# actual values of x1, y1, x2, and y2
x1,y1 = bins_dict[x1y1x2y2[0]], bins_dict[x1y1x2y2[1]]
x2,y2 = bins_dict[x1y1x2y2[2]], bins_dict[x1y1x2y2[3]]

df['above'] = df.eval(" ( (@x2-@x1) * gmd_sq )  >= ( (@y2-@y1) * (g_sq - @x1 )) ")

print(df['above'].value_counts())
print()
print(df['above'].value_counts(normalize=True))
[10, 0, 99, 90]
True     62302
False    51668
Name: above, dtype: int64

True     0.546653
False    0.453347
Name: above, dtype: float64
In [20]:
df[df.above==True]['gm_sq'].plot.hist(figsize=(12,8),bins=60,color='b',label='Above')

nabove = len(df[df.above==True])
nbelow = len(df[df.above==False])

above_pct = nabove/(nabove+nbelow)*100
below_pct = 100-above_pct
text = f'Above = {nabove:,} ({above_pct:,.0f}%)\nBelow = {nbelow:,} ( {below_pct:,.0f}%)'

plt.text(0.5,10_000,text,fontsize=14)
plt.legend()

plt.xlabel('gm_sq')
plt.title('gm_sq histograms for above and below cases', fontsize=20)

df[df.above==False]['gm_sq'].plot.hist(figsize=(12,8),bins=60,color='r',alpha=0.5,label='Below')
plt.legend()
plt.xlabel('gm_sq')
plt.title('gm_sq histograms below the boundary', fontsize=20)
Out[20]:
Text(0.5, 1.0, 'gm_sq histograms below the boundary')
In [21]:
df[df.above==True]['gm_sq'].plot.hist(figsize=(12,8),bins=60,color='r',alpha=0.5)
plt.xlabel('gm_sq')
plt.title('gm_sq histogram', fontsize=20)
Out[21]:
Text(0.5, 1.0, 'gm_sq histogram')
In [22]:
df[df.above==False]['gm_sq'].plot.hist(figsize=(12,8),bins=60,color='r',alpha=0.5,label='Below')
plt.legend()
plt.xlabel('gm_sq')
plt.title('gm_sq histograms below the boundary', fontsize=20)
Out[22]:
Text(0.5, 1.0, 'gm_sq histograms below the boundary')

Now, Take only the data above the lower diagonal as cleaned data

In [23]:
rows_before = df.shape[0]
df = df[df.above==True]

print(f'Before rows: {rows_before}, After rows: {df.shape[0]}')
Before rows: 113970, After rows: 62302
In [24]:
plt.figure(figsize=(12,8))
sns.distplot(df['gm_sq'],label='gm_sq')
plt.legend()
plt.savefig('images/gmsq_histogram_after_cleaning.png',dpi=300)

gm vs gc Plots

Equation of straight line:
y-y1 = y2-y1 * (x-x1)
       -----
       x2-x1

boundary: (x2-x1) * (y-y1) - (y2-y1) * (x-x1)
In [25]:
fig,ax = plt.subplots(1,2,figsize=(16,8))
df.plot.scatter(x='gm[0]',y='gc[0]', ax=ax[0])
df.plot.scatter(x='gm[1]',y='gc[1]', ax=ax[1])

ax[0].set_xlim(-2.0,2.0)
ax[0].set_ylim(-2.0,2.0)

ax[1].set_xlim(-2.0,2.0)
ax[1].set_ylim(-2.0,2.0)

# 45 degree line
n=2.0
ax[0].plot([-n,n],[-n,n],'r--')
ax[1].plot([-n,n],[-n,n],'r--')

plt.suptitle('gm vs gc plot',weight='bold',fontsize=24);

Remove outliers based on gm vs gc plot

In [26]:
# Two scatter plots
#===========================================
fig,ax = plt.subplots(1,2,figsize=(16,8))
plt.suptitle('gm vs gc plot',weight='bold',fontsize=24);

df.plot.scatter(x='gm[0]',y='gc[0]', ax=ax[0])
df.plot.scatter(x='gm[1]',y='gc[1]', ax=ax[1])

ax[0].set_xlim(-2.0,2.0)
ax[0].set_ylim(-2.0,2.0)

ax[1].set_xlim(-2.0,2.0)
ax[1].set_ylim(-2.0,2.0)

# 45 degree line
n=2.0
ax[0].plot([-n,n],[-n,n],'r--')
ax[1].plot([-n,n],[-n,n],'r--')



# Left plot gm0 vs gc0 parallel lines
#===========================================
x = df['gm[0]'].to_numpy()
y = df['gc[0]'].to_numpy()

# Find distance for parallel lines
sigma = (x - y).std()
n = 10
d = n * sigma
c = d/np.sqrt(2) 
y1 = x + c
y2 = x - c

# left plot parallel lines
ax[0].plot(y1,x,'b-.',label=f'{n} sigma')
ax[0].plot(y2,x,'b-.')


# Right plot gm1 vs gc1 parallel lines
#===========================================
x = df['gm[1]'].to_numpy()
y = df['gc[1]'].to_numpy()

sigma = (x - y).std()
n = 10
d = n * sigma
c = d/np.sqrt(2) 
y1 = x + c
y2 = x - c

ax[1].plot(y1,x,'b-.', label=f'{n} sigma')
ax[1].plot(y2,x,'b-.')


#=============
# Add legends
# plt.tight_layout()
ax[0].legend(loc='upper left')
ax[1].legend(loc='upper left')

plt.show()
In [27]:
# Remove outliers for gc0 vs gm0
n = 10
sigma = (df['gm[0]'] - df['gc[0]']).std()
d = n * sigma
c = d/np.sqrt(2)

cond_upper = (df['gc[0]'] - df['gm[0]'] <= c)
cond_below = (df['gm[0]'] - df['gc[0]'] <= c)

df = df[ cond_upper & cond_below ]


# df.plot.scatter(x='gm[0]',y='gc[0]')
In [28]:
# Remove outliers for gc1 vs gm1
n = 10
sigma = (df['gm[1]'] - df['gc[1]']).std()
d = n * sigma
c = d/np.sqrt(2)

cond_upper = (df['gc[1]'] - df['gm[1]'] <= c)
cond_below = (df['gm[1]'] - df['gc[1]'] <= c)

df = df[ cond_upper & cond_below ]


# df.plot.scatter(x='gm[1]',y='gc[1]')
In [29]:
# Two scatter plots
#===========================================
fig,ax = plt.subplots(1,2,figsize=(16,8))
plt.suptitle('gm vs gc plot',weight='bold',fontsize=24);

df.plot.scatter(x='gm[0]',y='gc[0]', ax=ax[0])
df.plot.scatter(x='gm[1]',y='gc[1]', ax=ax[1])

ax[0].set_xlim(-2.0,2.0)
ax[0].set_ylim(-2.0,2.0)

ax[1].set_xlim(-2.0,2.0)
ax[1].set_ylim(-2.0,2.0)

# 45 degree line
n=2.0
ax[0].plot([-n,n],[-n,n],'r--')
ax[1].plot([-n,n],[-n,n],'r--')



# Left plot gm0 vs gc0 parallel lines
#===========================================
x = df['gm[0]'].to_numpy()
y = df['gc[0]'].to_numpy()

# Find distance for parallel lines
sigma = (x - y).std()
n = 10
d = n * sigma
c = d/np.sqrt(2) 
y1 = x + c
y2 = x - c

# left plot parallel lines
ax[0].plot(y1,x,'b-.',label=f'{n} sigma')
ax[0].plot(y2,x,'b-.')




# Right plot gm1 vs gc1 parallel lines
#===========================================
x = df['gm[1]'].to_numpy()
y = df['gc[1]'].to_numpy()

sigma = (x - y).std()
n = 10
d = n * sigma
c = d/np.sqrt(2) 
y1 = x + c
y2 = x - c

ax[1].plot(y1,x,'b-.', label=f'{n} sigma')
ax[1].plot(y2,x,'b-.')


#=============
# Add legends
# plt.tight_layout()
ax[0].legend(loc='upper left')
ax[1].legend(loc='upper left')

plt.show()

Plot magnidude for different bin numbers

In [30]:
df.filter(regex='mag').head(2)
Out[30]:
mag[0][0] mag[1][0] mag[2][0] mag[3][0] mag_mono mag_chro
0 -12.255571 -12.294254 -12.261842 -12.309714 -12.274912 -12.285778
2 -8.918813 -8.912980 -8.915847 -8.909728 -8.915896 -8.912788
In [31]:
df[['gm_sq','gc_sq']].describe()
Out[31]:
gm_sq gc_sq
count 6.197200e+04 6.197200e+04
mean 6.748437e-02 6.807082e-02
std 1.247800e-01 1.264310e-01
min 3.650000e-07 2.225000e-07
25% 4.631481e-03 4.639044e-03
50% 1.396151e-02 1.407223e-02
75% 5.952628e-02 5.961002e-02
max 1.866975e+00 2.005953e+00
In [32]:
def plot_bin_mag_mono_chro(nbins,show=False,ret=False):
    import os
    
    if not os.path.isdir('images'):
        os.makedirs('images')
    
    df['bins_mag_mono'] = pd.cut(df['mag_mono'],nbins)
    df['bins_mag_chro'] = pd.cut(df['mag_chro'],nbins)
    
    # show the text of bin counts
    text_mono = df.groupby('bins_mag_mono')['gm_sq'].agg(['count', 'mean'])
    text_mono = text_mono.reset_index().assign(pct_change = lambda x: x['mean'].pct_change())
    text_mono = text_mono.to_string().replace('mean','gm_sq')
    
    text_chro = df.groupby('bins_mag_chro')['gc_sq'].agg(['count', 'mean'])
    text_chro = text_chro.reset_index().assign(pct_change = lambda x: x['mean'].pct_change())
    text_chro = text_chro.to_string().replace('mean','gc_sq')
 
    # data to plot
    df_mono_gm_sq_mean = df.groupby('bins_mag_mono')['gm_sq'].mean()
    df_chro_gm_sq_mean = df.groupby('bins_mag_chro')['gc_sq'].mean()
    
    # plot
    fig,ax = plt.subplots(1,2,figsize=(12,8))
    
    # mono (plot the magnitude mean in each bins)
    df_mono_gm_sq_mean.plot(marker='o',ax=ax[0])
    ax[0].tick_params(axis='x', rotation=90)
    ax[0].set_ylabel('gm_sq_mean',fontsize=18)
    ax[0].set_xlabel('bin_mag_mono',fontsize=18)
    ax[0].set_title(f'gm_sq per magnitude bins with nbins = {nbins}')
    ax[0].text(0,0.5,text_mono,fontsize=14,va='center')
    ax[0].set_ylim(0,1)
    ax[0].set_yticks(np.arange(0, 1, step=0.1))
 
    # chro
    df_chro_gm_sq_mean.plot(marker='o',ax=ax[1])
    ax[1].tick_params(axis='x', rotation=90)
    ax[1].set_ylabel('gc_sq_mean',fontsize=18)
    ax[1].set_xlabel('bin_mag_chro',fontsize=18)
    ax[1].set_title(f'gc_sq per magnitude bins with nbins = {nbins}')
    ax[1].text(0,0.5,text_chro,fontsize=14,va='center')
    ax[1].set_ylim(0,1)
    ax[1].set_yticks(np.arange(0, 1, step=0.1))
    
    plt.tight_layout()
    plt.savefig(f'images/bin_mag_mono_chro_{nbins}.png')
    
    if show:
        plt.show()
    plt.close()
    
    if ret:
        return df_mono_gm_sq_mean, df_chro_gm_sq_mean

for nbins in range(5,15):
    plot_bin_mag_mono_chro(nbins,show=True)
In [33]:
"""
Note:
These plots were much different previously without doing some filterings:

https://nbviewer.jupyter.org/github/bpRsh/2019_shear_analysis_after_dmstack/blob/master/Jan_2020/a01_jan8/a01_cleancat15_gc0_gm0.ipynb

""";

Magnitude weight column for Monochromatic case

In [34]:
df['mag_mono'].plot.hist(bins=100)
Out[34]:
<matplotlib.axes._subplots.AxesSubplot at 0x12b20e978>
In [35]:
nbins = 9 # the bin number looks good in above gm_sq plots
df_mono, df_chro = plot_bin_mag_mono_chro(nbins,show=True,ret=True)
In [36]:
# from plot above I can see from Kth point graph goes up.
# graph is flat upto Kth point then increases linearly to top of curve.

num_start_increasing = 3 
df_mono.index[num_start_increasing]
Out[36]:
Interval(-12.524, -11.775, closed='right')
In [94]:
# how to choose the bottom of the slope mathematically?
# try to see pct change

df_mono.pct_change().to_frame('gm_sq_pct_change').style.background_gradient(low=0,high=0.9)
/Users/poudel/Library/Enthought/Canopy/edm/envs/deeplr/lib/python3.6/site-packages/matplotlib/colors.py:527: RuntimeWarning:

invalid value encountered in less

Out[94]:
gm_sq_pct_change
bins_mag_mono
(-14.778, -14.022] nan
(-14.022, -13.273] -0.178013
(-13.273, -12.524] -0.129403
(-12.524, -11.775] 0.070247
(-11.775, -11.026] 0.855582
(-11.026, -10.277] 0.439657
(-10.277, -9.528] 0.210295
(-9.528, -8.779] -0.441154
(-8.779, -8.03] -0.205357
In [38]:
# look at smaller part than
df_mono_left = df_mono.pct_change()\
    .loc[df_mono.pct_change().index < df_mono.pct_change().idxmax()]

df_mono_left
Out[38]:
bins_mag_mono
(-14.778, -14.022]         NaN
(-14.022, -13.273]   -0.178013
(-13.273, -12.524]   -0.129403
(-12.524, -11.775]    0.070247
Name: gm_sq, dtype: float64
In [44]:
df_mono.idxmax().left, df_mono.idxmax().right
Out[44]:
(-10.277, -9.528)
In [45]:
# look at case when nbinsK = 9 and when the curve is going up
# index for when curve goes linearly up after initial flat
idx_bottom_of_slope_mono = [df_mono.index[num_start_increasing].left,
                       df_mono.index[num_start_increasing].right
                       ]

mag_low_nbinsK_mono = np.mean(idx_bottom_of_slope_mono)

# index for peak of curve
idx_peak_mono = [df_mono.idxmax().left,
            df_mono.idxmax().right
           ]
mag_high_nbinsK_mono = np.mean(idx_peak_mono)

idx_bottom_of_slope_mono, idx_peak_mono, mag_low_nbinsK_mono, mag_high_nbinsK_mono
Out[45]:
([-12.524, -11.775], [-10.277, -9.528], -12.1495, -9.9025)
In [46]:
from scipy.optimize import curve_fit

xcol = 'mag_mono'
ycol = 'gm_sq'

x = df.query("""  @mag_low_nbinsK_mono < mag_mono <  @mag_high_nbinsK_mono  """)[xcol].to_numpy()
y = df.query("""  @mag_low_nbinsK_mono < mag_mono <  @mag_high_nbinsK_mono  """)[ycol].to_numpy()

def func(x, a, b):
    return a*x + b

params, _ = curve_fit(func, x, y)
[a, b] = params.round(2)

print(f'magnitude ranges for mono: {mag_low_nbinsK_mono}, {mag_high_nbinsK_mono}')
print(f'fitting params   for mono: {a}, {b}' )
magnitude ranges for mono: -12.1495, -9.9025
fitting params   for mono: 0.04, 0.51
In [47]:
mag_low_nbinsK_mono
Out[47]:
-12.1495
In [48]:
def magnitude_weight_mono(mag):
    if mag < mag_low_nbinsK_mono:
        return 1/ (mag_low_nbinsK_mono *a + b)
    
    else:
        return 1/ (a*mag + b)

df['wt_mag_mono'] = df['mag_mono'].apply(magnitude_weight_mono)
df['wt_mag_mono'] = df['wt_mag_mono'] / df['wt_mag_mono'].mean() # normalize by mean

Magnitude weight column for Chromatic case

In [49]:
df['mag_chro'].plot.hist(bins=100)
Out[49]:
<matplotlib.axes._subplots.AxesSubplot at 0x1288ece48>
In [50]:
nbins = 9 # the bin number looks good in above gm_sq plots
df_mono, df_chro = plot_bin_mag_mono_chro(nbins,show=True,ret=True)
In [51]:
# from plot above I can see from Kth point graph goes up.
# graph is flat upto Kth point then increases linearly to top of curve.

num_start_increasing = 3 
df_chro.index[num_start_increasing]
Out[51]:
Interval(-12.546, -11.804, closed='right')
In [52]:
# look at case when nbinsK = 9 and when the curve is going up
# index for when curve goes linearly up after initial flat
idx_bottom_of_slope_chro = [df_chro.index[num_start_increasing].left,
                            df_chro.index[num_start_increasing].right
                            ]

mag_low_nbinsK_chro = np.mean(idx_bottom_of_slope_chro)

# index for peak of curve
idx_peak_chro = [df_chro.idxmax().left,
                 df_chro.idxmax().right
                 ]
mag_high_nbinsK_chro = np.mean(idx_peak_chro)

idx_bottom_of_slope_chro, idx_peak_chro, mag_low_nbinsK_chro, mag_high_nbinsK_chro
Out[52]:
([-12.546, -11.804], [-10.32, -9.578], -12.175, -9.949)
In [53]:
from scipy.optimize import curve_fit


xcol = 'mag_chro'
ycol = 'gc_sq'

x = df.query("""  @mag_low_nbinsK_chro < mag_chro <  @mag_high_nbinsK_chro  """)[xcol].to_numpy()
y = df.query("""  @mag_low_nbinsK_chro < mag_chro <  @mag_high_nbinsK_chro  """)[ycol].to_numpy()

def func(x, a, b):
    return a*x + b

params, _ = curve_fit(func, x, y)
[a, b] = params.round(2)

print(f'magnitude ranges for chro: {mag_low_nbinsK_chro}, {mag_high_nbinsK_chro}')
print(f'fitting params   for chro: {a}, {b}' )
magnitude ranges for chro: -12.175, -9.949
fitting params   for chro: 0.04, 0.51
In [54]:
mag_low_nbinsK_chro
Out[54]:
-12.175
In [55]:
def magnitude_weight_chro(mag):
    if mag < mag_low_nbinsK_chro:
        return 1/ (mag_low_nbinsK_chro*a + b)
    
    else:
        return 1/ (a*mag + b)

df['wt_mag_chro'] = df['mag_chro'].apply(magnitude_weight_chro)
df['wt_mag_chro'] = df['wt_mag_chro'] / df['wt_mag_chro'].mean() # normalize by mean

# mean
df['wt_mag']      = (df['wt_mag_mono'] + df['wt_mag_chro']) / 2

# df.drop(['wt_mag_chro','wt_mag_mono'],axis=1,inplace=True)

df.iloc[:2,-7:]
Out[55]:
g_sq_bins above bins_mag_mono bins_mag_chro wt_mag_mono wt_mag_chro wt_mag
0 (0.16, 0.2] True (-12.524, -11.775] (-12.546, -11.804] 1.789438 1.816861 1.803149
2 (1.198, 1.238] True (-9.528, -8.779] (-9.578, -8.837] 0.280263 0.272254 0.276258

Ellipticity Components Transformation

c2 = (dx * dx - dy * dy) / (r * r);
s2 = 2 * dx * dy / (r * r);
eX = s2 * e[0] + c2 * e[1];
eesum += eX * eX * w[0] * w[0];
eTsum[bin] -= (c2 * e[0] + s2 * e[1]) * w[0];
In [56]:
df.head(2)
Out[56]:
fN[0][0] fN[1][0] fN[2][0] fN[3][0] id[0][0] id[1][0] id[2][0] id[3][0] x[0] x[1] errx[0][0] errx[0][1] errx[1][0] errx[1][1] errx[2][0] errx[2][1] errx[3][0] errx[3][1] g[0][0] g[0][1] g[1][0] g[1][1] g[2][0] g[2][1] g[3][0] g[3][1] ellip[0][0] ellip[1][0] ellip[2][0] ellip[3][0] flux[0][0] flux[1][0] flux[2][0] flux[3][0] radius[0][0] radius[1][0] radius[2][0] radius[3][0] mag[0][0] mag[1][0] mag[2][0] mag[3][0] gm[0] gm[1] gc[0] gc[1] gmd[0] gmd[1] gcd[0] gcd[1] g_sq gmd_sq gm_sq gc_sq mag_mono mag_chro g_sq_bins above bins_mag_mono bins_mag_chro wt_mag_mono wt_mag_chro wt_mag
0 0 0 0 0 5301 5314 5231 5117 88.17075 1847.1934 0.0196 0.0249 0.0227 0.0216 0.0200 0.0256 0.0231 0.0220 -0.4253 0.1855 0.2730 -0.3021 -0.4257 0.1904 0.2778 -0.3155 0.463994 0.407177 0.466340 0.420373 79841.4700 82737.3540 80303.923 83923.9080 5.186953 5.293858 5.267827 5.390682 -12.255571 -12.294254 -12.261842 -12.309714 -0.07615 -0.0583 -0.07395 -0.06255 -0.34915 0.2438 -0.35175 0.25295 0.215290 0.181344 0.009198 0.009381 -12.274912 -12.285778 (0.16, 0.2] True (-12.524, -11.775] (-12.546, -11.804] 1.789438 1.816861 1.803149
2 0 0 0 0 1301 1310 1323 1312 2652.56650 1772.3448 0.2510 0.1715 0.1663 0.3002 0.2522 0.1715 0.1665 0.3017 0.9614 0.5881 -0.9979 -0.4635 1.0062 0.6076 -1.0206 -0.4729 1.127010 1.100289 1.175422 1.124837 3694.2411 3674.4453 3684.164 3663.4596 4.161950 4.303319 4.159870 4.301257 -8.918813 -8.912980 -8.915847 -8.909728 -0.01825 0.0623 -0.00720 0.06735 0.97965 0.5258 1.01340 0.54025 1.270152 1.236180 0.004214 0.004588 -8.915896 -8.912788 (1.198, 1.238] True (-9.528, -8.779] (-9.578, -8.837] 0.280263 0.272254 0.276258
In [57]:
# constants
RMIN = 10
DLNR = 0.5

df['dx'] = df['x[0]'] - 1699 # jesisim output fitsfiles have shape 3398, 3398
df['dy'] = df['x[1]'] - 1699

df['r'] = np.hypot(df['dx'], df['dy'])
# df['r'] = np.sqrt(df['dx']**2 + df['dy']**2)

df['cos2theta'] = df.eval(' (dx * dx - dy * dy) / (r * r)' )
df['sin2theta'] = df.eval(' (2  * dx * dy     ) / (r * r)' )

df['bin'] = ( np.log(df.r / RMIN) / DLNR).astype(int)

df['bin'].value_counts()
Out[57]:
9     24371
10    22321
8      9617
7      3947
6      1425
5       168
3        76
4        20
2        12
0         8
1         7
Name: bin, dtype: int64
In [58]:
df['eX_mono'] =       df['sin2theta'] * df['gm[0]'] + df['cos2theta'] * df['gm[1]']
df['eT_mono'] = -1 * (df['cos2theta'] * df['gm[0]'] + df['sin2theta'] * df['gm[1]'] ) 

df['eX_chro'] =       df['sin2theta'] * df['gc[0]'] + df['cos2theta'] * df['gc[1]']
df['eT_chro'] = -1 * (df['cos2theta'] * df['gc[0]'] + df['sin2theta'] * df['gc[1]']  )
In [59]:
df['eT_mono_times_wt'] = df['eT_mono'] * df['wt_mag']
df['eT_chro_times_wt'] = df['eT_chro'] * df['wt_mag']
In [60]:
df['eX_monosq_times_wt'] = df['eX_mono'] *  df['eX_mono'] * df['wt_mag']
df['eX_chrosq_times_wt'] = df['eX_chro'] *  df['eX_chro'] * df['wt_mag']

df['eXsq_times_wt'] = df.eval(" (eX_monosq_times_wt + eX_chrosq_times_wt) / 2 ")

df.iloc[:2,-6:]
Out[60]:
eT_chro eT_mono_times_wt eT_chro_times_wt eX_monosq_times_wt eX_chrosq_times_wt eXsq_times_wt
0 0.061296 0.115825 0.110526 0.003401 0.004156 0.003778
2 -0.003184 0.002350 -0.000880 0.000954 0.001184 0.001069

Error Analysis

https://www.phenix.bnl.gov/WWW/publish/elke/EIC/Files-for-Wiki/lara.02-008.errors.pdf

When the statistics involved in calculating $E_A$ and $E_B$ are not indendent, the error for a function $f(E_A, E_B)$ has the expression: $$ \sigma_{f}=\sqrt{\left(\frac{\partial f}{\partial E_{A}} \sigma_{A}\right)^{2}+\left(\frac{\partial f}{\partial E_{B}} \sigma_{B}\right)^{2}+2 \frac{\partial f}{\partial E_{A}} \frac{\partial f}{\partial E_{B}} \operatorname{cov}\left(E_{A}, E_{B}\right)} $$

where the last term takes care of the correlations between $E_A$ and $E_B$. Given a large number $N$ of measurements $E_{A_i}$, the standard deviation $\sigma_A$ is empirically defined as:

$$ \sigma_{A}^{2}=\frac{1}{N-1} \sum_{i=1}^{N}\left(E_{A_{i}}-E_{A}\right)^{2} $$

while the covariance between $E_A$ and $E_B$ is given by: $$ \operatorname{cov}\left(E_{A}, E_{B}\right)=\frac{1}{N-1} \sum_{i=1}^{N}\left(E_{A_{i}}-E_{A}\right)\left(E_{B_{i}}-E_{B}\right) $$

where $E_A$ and $E_B$ are the averages of $E_{A_i}$ and $E_{B_i}$. When $E_A$ and $E_B$ are independent, over a large number $N$ of measurements they will fluctuate around their average in an uncorrelated way, so that the covariance is zero and one recovers the usual formula for the propagation of errors in a function of independent variables.

In [61]:
"""
f = m/c
df/dm = 1/c
df/dc = -m/c2

s2 ==> sigma
s2 = r2 (sm2/m2 + sc2/c2 -2/m/c cov(m,c))

put m=c,
s2 = 2/m2 (sm2 - cov)

error = sigma/sqrt(n)

error = 
            0.000332
      sqrt  --------
             eT(bin)**2
    ---------------------
     sqrt(ngals_bin)
""";
In [62]:
cov = df[['eT_chro','eT_mono']].cov()
cov
Out[62]:
eT_chro eT_mono
eT_chro 0.034378 0.034057
eT_mono 0.034057 0.034060
In [63]:
diag_mean = np.diag(cov).mean()
diag_mean
Out[63]:
0.034219209292557035
In [64]:
offdiag = cov.iloc[0,1]
offdiag
Out[64]:
0.03405671922115891
In [65]:
diag_mean - offdiag
Out[65]:
0.00016249007139812477
In [66]:
myerr = (diag_mean - offdiag) * 2
myerr
Out[66]:
0.00032498014279624954
In [67]:
# temporary value to calculate error
# err_coeff = 0.000332

err_coeff = myerr

df['err_numerator'] = myerr  # np.sqrt(err_coeff/df['eT_mono']/df['eT_mono'])
df['eX_monosq_times_wt_std'] = df['eX_monosq_times_wt'].std()
df['eX_chrosq_times_wt_std'] = df['eX_chrosq_times_wt'].std()
df['eT_ratio'] = df['eT_mono'] / df['eT_chro']
df['eT_ratio_std'] = df['eT_ratio'].std()
df.iloc[:2,-5:]
Out[67]:
err_numerator eX_monosq_times_wt_std eX_chrosq_times_wt_std eT_ratio eT_ratio_std
0 0.000325 0.065307 0.066161 1.047939 119.500671
2 0.000325 0.065307 0.066161 -2.671767 119.500671

Radial Binnings

In [68]:
# constants
RMIN = 10
DLNR = 0.5
In [69]:
# renaming aggegration needs pandas > 0.25
pd.__version__
Out[69]:
'1.0.0rc0'
In [70]:
df.filter(regex='times|err_numerator').head().round(1)
Out[70]:
eT_mono_times_wt eT_chro_times_wt eX_monosq_times_wt eX_chrosq_times_wt eXsq_times_wt err_numerator eX_monosq_times_wt_std eX_chrosq_times_wt_std
0 0.1 0.1 0.0 0.0 0.0 0.0 0.1 0.1
2 0.0 -0.0 0.0 0.0 0.0 0.0 0.1 0.1
4 -0.0 -0.0 0.0 0.0 0.0 0.0 0.1 0.1
8 0.2 0.2 0.0 0.0 0.0 0.0 0.1 0.1
16 0.1 0.1 0.0 0.0 0.0 0.0 0.1 0.1
In [71]:
df_radial_bins = df.groupby('bin').agg(
    r_mean                 = ('r', 'mean'),
    wt_mag_sum             = ('wt_mag', 'sum'),
    eT_mono_times_wt_sum   = ('eT_mono_times_wt', 'sum'),
    eT_chro_times_wt_sum   = ('eT_chro_times_wt', 'sum'),
    eX_monosq_times_wt_sum = ('eX_monosq_times_wt', 'sum'),
    eX_chrosq_times_wt_sum = ('eX_chrosq_times_wt', 'sum'),
    err_numerator_mean     = ('err_numerator', 'mean')
                                       )


# bin counts
df_radial_bins['bin_count'] = df['bin'].value_counts()

# columns after binning 
df_radial_bins['eT_mean_mono'] = (df_radial_bins['eT_mono_times_wt_sum'] / 
                                  df_radial_bins['wt_mag_sum'])

df_radial_bins['eT_mean_chro'] = (df_radial_bins['eT_chro_times_wt_sum'] /
                                  df_radial_bins['wt_mag_sum'])
               
df_radial_bins['eX_mean_mono'] = np.sqrt(df_radial_bins['eX_monosq_times_wt_sum'] /
                                         df_radial_bins['wt_mag_sum'])
                                                     
df_radial_bins['eX_mean_chro'] = np.sqrt(df_radial_bins['eX_chrosq_times_wt_sum'] /
                                         df_radial_bins['wt_mag_sum'])
               
df_radial_bins['eT_ratio']     = (df_radial_bins['eT_mean_mono'] /
                                  df_radial_bins['eT_mean_chro'])


# Errors
sqrt_bin = np.sqrt(df_radial_bins['bin_count'])
df_radial_bins['eT_mono_err'] = df_radial_bins['eX_mean_mono'] / sqrt_bin
df_radial_bins['eT_chro_err'] = df_radial_bins['eX_mean_chro'] / sqrt_bin
df_radial_bins['eT_ratio_err'] = ( np.sqrt(df_radial_bins['err_numerator_mean']/
                                  df_radial_bins['eT_mean_mono'] /
                                  df_radial_bins['eT_mean_chro']) /
                                   sqrt_bin)

print('Statistics for different radial bins')
print(f'RMIN = {RMIN} and DLNR = {DLNR}')

df_radial_bins.style\
.background_gradient(subset=['eT_ratio_err'],cmap='Blues')
Statistics for different radial bins
RMIN = 10 and DLNR = 0.5
Out[71]:
r_mean wt_mag_sum eT_mono_times_wt_sum eT_chro_times_wt_sum eX_monosq_times_wt_sum eX_chrosq_times_wt_sum err_numerator_mean bin_count eT_mean_mono eT_mean_chro eX_mean_mono eX_mean_chro eT_ratio eT_mono_err eT_chro_err eT_ratio_err
bin
0 12.168337 2.740817 -0.190132 -0.169894 0.449066 0.452275 0.000325 8 -0.069371 -0.061987 0.404776 0.406220 1.119121 0.143110 0.143620 0.097196
1 20.675363 2.961441 -0.006014 -0.025063 0.365031 0.397593 0.000325 7 -0.002031 -0.008463 0.351086 0.366411 0.239973 0.132698 0.138490 1.643481
2 37.586384 12.750447 -2.154854 -2.221593 1.095048 1.144964 0.000325 12 -0.169002 -0.174236 0.293058 0.299663 0.969959 0.084599 0.086505 0.030326
3 60.135224 67.558776 14.671218 15.003910 4.443602 4.730154 0.000325 76 0.217162 0.222087 0.256464 0.264604 0.977826 0.029418 0.030352 0.009416
4 84.499712 13.841928 5.393853 5.471342 2.621588 2.665473 0.000325 20 0.389675 0.395273 0.435195 0.438822 0.985837 0.097313 0.098124 0.010271
5 177.775984 162.038743 82.273276 82.692797 27.988934 28.717482 0.000325 168 0.507738 0.510327 0.415608 0.420982 0.994927 0.032065 0.032479 0.002732
6 273.310843 1363.031799 541.093671 544.992354 147.778729 150.455795 0.000325 1425 0.396978 0.399838 0.329271 0.332240 0.992846 0.008723 0.008801 0.001199
7 444.794988 3905.405324 872.047624 878.591836 175.069995 178.250233 0.000325 3947 0.223292 0.224968 0.211725 0.213640 0.992551 0.003370 0.003401 0.001280
8 733.219672 9552.465750 1209.383715 1215.541911 262.178904 266.833729 0.000325 9617 0.126604 0.127249 0.165669 0.167133 0.994934 0.001689 0.001704 0.001448
9 1212.972413 24514.286360 1698.625812 1709.432283 504.922333 509.768551 0.000325 24371 0.069291 0.069732 0.143517 0.144204 0.993678 0.000919 0.000924 0.001661
10 1740.669831 22374.918614 906.628513 911.110620 446.450157 445.387188 0.000325 22321 0.040520 0.040720 0.141256 0.141087 0.995081 0.000945 0.000944 0.002971
In [72]:
# why some eT values are -ve?
"""
1. For given rmin and dlnr we have some bins very few object counts.

""";
In [73]:
pd.cut(df['eT_mono'],20).value_counts().to_frame().style.background_gradient()
Out[73]:
eT_mono
(-0.00253, 0.112] 30161
(0.112, 0.227] 12368
(-0.117, -0.00253] 5976
(0.227, 0.342] 4292
(0.342, 0.457] 2190
(-0.232, -0.117] 1823
(0.457, 0.571] 1213
(-0.347, -0.232] 1054
(-0.462, -0.347] 853
(0.571, 0.686] 659
(-0.576, -0.462] 618
(-0.691, -0.576] 364
(0.686, 0.801] 198
(-0.806, -0.691] 139
(-0.921, -0.806] 28
(0.801, 0.916] 25
(-1.038, -0.921] 4
(1.03, 1.145] 3
(0.916, 1.03] 2
(1.145, 1.26] 2
In [74]:
fig,ax = plt.subplots(figsize=(12,8))
sns.distplot(df['eT_mono'],ax=ax)
plt.title('Histogram and density plot of eT_mono');
In [75]:
df['wt_mag'].hist(bins=100,figsize=(12,8));
plt.title('Histogram of wt_mag')
plt.xlabel('wt_mag')
plt.ylabel('Frequency');
plt.savefig('images/weights.png',dpi=300)
In [76]:
df.query(""" r>500 """)['eT_mono'].describe().to_frame()
Out[76]:
eT_mono
count 57249.000000
mean 0.064994
std 0.171199
min -1.035398
25% 0.019119
50% 0.068617
75% 0.126712
max 1.259864
In [77]:
df['gm_sq'].hist(bins=100,figsize=(12,8))
plt.title('Histogram of gm_sq')
plt.ylabel('Frequency')
plt.xlabel('gm_sq')
plt.savefig('images/gm_sq_hist_after_cleaning.png',dpi=300)
In [78]:
df.head(2)
Out[78]:
fN[0][0] fN[1][0] fN[2][0] fN[3][0] id[0][0] id[1][0] id[2][0] id[3][0] x[0] x[1] errx[0][0] errx[0][1] errx[1][0] errx[1][1] errx[2][0] errx[2][1] errx[3][0] errx[3][1] g[0][0] g[0][1] g[1][0] g[1][1] g[2][0] g[2][1] g[3][0] g[3][1] ellip[0][0] ellip[1][0] ellip[2][0] ellip[3][0] flux[0][0] flux[1][0] flux[2][0] flux[3][0] radius[0][0] radius[1][0] radius[2][0] radius[3][0] mag[0][0] mag[1][0] mag[2][0] mag[3][0] gm[0] gm[1] gc[0] gc[1] gmd[0] gmd[1] gcd[0] gcd[1] g_sq gmd_sq gm_sq gc_sq mag_mono mag_chro g_sq_bins above bins_mag_mono bins_mag_chro wt_mag_mono wt_mag_chro wt_mag dx dy r cos2theta sin2theta bin eX_mono eT_mono eX_chro eT_chro eT_mono_times_wt eT_chro_times_wt eX_monosq_times_wt eX_chrosq_times_wt eXsq_times_wt err_numerator eX_monosq_times_wt_std eX_chrosq_times_wt_std eT_ratio eT_ratio_std
0 0 0 0 0 5301 5314 5231 5117 88.17075 1847.1934 0.0196 0.0249 0.0227 0.0216 0.0200 0.0256 0.0231 0.0220 -0.4253 0.1855 0.2730 -0.3021 -0.4257 0.1904 0.2778 -0.3155 0.463994 0.407177 0.466340 0.420373 79841.4700 82737.3540 80303.923 83923.9080 5.186953 5.293858 5.267827 5.390682 -12.255571 -12.294254 -12.261842 -12.309714 -0.07615 -0.0583 -0.07395 -0.06255 -0.34915 0.2438 -0.35175 0.25295 0.215290 0.181344 0.009198 0.009381 -12.274912 -12.285778 (0.16, 0.2] True (-12.524, -11.775] (-12.546, -11.804] 1.789438 1.816861 1.803149 -1610.82925 148.1934 1617.631650 0.983215 -0.182452 10 -0.043428 0.064235 -0.048008 0.061296 0.115825 0.110526 0.003401 0.004156 0.003778 0.000325 0.065307 0.066161 1.047939 119.500671
2 0 0 0 0 1301 1310 1323 1312 2652.56650 1772.3448 0.2510 0.1715 0.1663 0.3002 0.2522 0.1715 0.1665 0.3017 0.9614 0.5881 -0.9979 -0.4635 1.0062 0.6076 -1.0206 -0.4729 1.127010 1.100289 1.175422 1.124837 3694.2411 3674.4453 3684.164 3663.4596 4.161950 4.303319 4.159870 4.301257 -8.918813 -8.912980 -8.915847 -8.909728 -0.01825 0.0623 -0.00720 0.06735 0.97965 0.5258 1.01340 0.54025 1.270152 1.236180 0.004214 0.004588 -8.915896 -8.912788 (1.198, 1.238] True (-9.528, -8.779] (-9.578, -8.837] 0.280263 0.272254 0.276258 953.56650 73.3448 956.383045 0.988237 0.152928 9 0.058776 0.008508 0.065457 -0.003184 0.002350 -0.000880 0.000954 0.001184 0.001069 0.000325 0.065307 0.066161 -2.671767 119.500671

Plot of eT for mono and chro

In [79]:
def plot_eT_mean(rmin, dlnr,min_bin_count,
                 show_ratio=True,
                 show_mono_chro=False,
                 show_data=True):
    # title
    title = f'rmin={rmin}, dlnr={dlnr}, min_bin_count={min_bin_count}'
    
    # radial bin
    df['bin'] = ( np.log(df.r / rmin) / dlnr).astype(int)

    # aggregations per given bin
    df_radial_bins = df.groupby('bin').agg(
        r_mean                 = ('r', 'mean'),
        wt_mag_sum             = ('wt_mag', 'sum'),
        eT_mono_times_wt_sum   = ('eT_mono_times_wt', 'sum'),
        eT_chro_times_wt_sum   = ('eT_chro_times_wt', 'sum'),
        eX_monosq_times_wt_sum = ('eX_monosq_times_wt', 'sum'),
        eX_chrosq_times_wt_sum = ('eX_chrosq_times_wt', 'sum'),
        err_numerator_mean     = ('err_numerator', 'mean')
                                           )
    
    # bin counts
    df_radial_bins['bin_count'] = df['bin'].value_counts()
    df_radial_bins = df_radial_bins.query(""" bin_count > @min_bin_count """) 
    df_radial_bins = df_radial_bins.iloc[:-1,:]

    # columns after binning 
    df_radial_bins['eT_mean_mono'] = (df_radial_bins['eT_mono_times_wt_sum'] / 
                                      df_radial_bins['wt_mag_sum'])

    df_radial_bins['eT_mean_chro'] = (df_radial_bins['eT_chro_times_wt_sum'] /
                                      df_radial_bins['wt_mag_sum'])

    df_radial_bins['eX_mean_mono'] = np.sqrt(
        df_radial_bins['eX_monosq_times_wt_sum'] /
        df_radial_bins['wt_mag_sum']
    )

    df_radial_bins['eX_mean_chro'] = np.sqrt(
        df_radial_bins['eX_chrosq_times_wt_sum'] /
        df_radial_bins['wt_mag_sum']
    )

    df_radial_bins['eT_ratio']     = (df_radial_bins['eT_mean_mono'] /
                                      df_radial_bins['eT_mean_chro'])

    
    # Errors
    sqrt_bin = np.sqrt(df_radial_bins['bin_count'])
    df_radial_bins['eT_mono_err'] = df_radial_bins['eX_mean_mono'] / sqrt_bin
    df_radial_bins['eT_chro_err'] = df_radial_bins['eX_mean_chro'] / sqrt_bin
    df_radial_bins['eT_ratio_err'] = ( np.sqrt(df_radial_bins['err_numerator_mean']/
                                  df_radial_bins['eT_mean_mono'] /
                                  df_radial_bins['eT_mean_chro']) /
                                   sqrt_bin)
 
    # also plot eT mono and eT chro
    if show_ratio:
        fig, ax = plt.subplots(1,1,figsize=(12,8))
        df_radial_bins.plot.line(x='r_mean',y='eT_ratio',
                                 yerr='eT_ratio_err',
                                 marker='o',color='b',ax=ax)
        ax.set_xlabel('Radius',fontsize=18)
        ax.set_ylabel(r'$\frac{eT_{mono}}{eT_{chro}}$',fontsize=18)
        plt.ylim(0.95, 1.0)
        
    if show_mono_chro:
        fig, ax = plt.subplots(3,1,figsize=(14,12))
        
        # top
        df_radial_bins.plot.line(x='r_mean',y='eT_mean_mono',
                                 yerr='eT_mono_err',
                                 marker='o',color='r', ax=ax[0])
        
        # top
        df_radial_bins.plot.line(x='r_mean',y='eT_mean_chro',
                                 yerr='eT_chro_err',
                                 marker='o',color='b',ax=ax[0])
        # middle
        df_radial_bins.plot.line(x='r_mean',y='eT_mean_mono',
                                 yerr='eT_mono_err',
                                 marker='o',color='r', ax=ax[1])
        
        # bottom
        df_radial_bins.plot.line(x='r_mean',y='eT_mean_chro',
                                 yerr='eT_chro_err',
                                 marker='o',color='b',ax=ax[2])
        
        # label
        ax[0].set_xlabel('')
        ax[1].set_xlabel('')
        ax[2].set_xlabel('Radius',fontsize=18)
        ax[0].set_ylabel('eT',fontsize=18)
        ax[1].set_ylabel('eT_mean_mono',fontsize=18)
        ax[2].set_ylabel('eT_mean_chro',fontsize=18)
        ax[0].set_xlim(0,1800)
        ax[1].set_xlim(0,1800)
        ax[2].set_xlim(0,1800)

    plt.suptitle(f'Plot of eT for {title}',fontsize=24,weight='bold')
    
    # also show the dataframe
    if show_data:
        display(df_radial_bins.style.background_gradient(subset=['eT_mean_mono']))
    
    
    plt.xlim(0,1800)
    plt.savefig('images/eT_versus_radius.png',dpi=300)
    plt.tight_layout()
    plt.show()

# plot now
plot_eT_mean(rmin=300, dlnr=0.5,min_bin_count=10,
            show_ratio=True,show_mono_chro=False)
r_mean wt_mag_sum eT_mono_times_wt_sum eT_chro_times_wt_sum eX_monosq_times_wt_sum eX_chrosq_times_wt_sum err_numerator_mean bin_count eT_mean_mono eT_mean_chro eX_mean_mono eX_mean_chro eT_ratio eT_mono_err eT_chro_err eT_ratio_err
bin
-3 55.871651 60.755237 9.896165 10.061877 3.619479 3.954027 0.000325 63 0.162886 0.165613 0.244079 0.255110 0.983531 0.030751 0.032141 0.013828
-2 75.471224 26.059639 8.712730 8.855116 3.580052 3.672126 0.000325 36 0.334338 0.339802 0.370647 0.375383 0.983921 0.061774 0.062564 0.008914
-1 163.657925 91.785470 48.254966 48.709079 15.971869 16.621183 0.000325 92 0.525736 0.530684 0.417149 0.425544 0.990677 0.043491 0.044366 0.003558
0 366.024952 4266.880418 1255.499974 1264.821614 298.706786 304.076804 0.000325 4381 0.294243 0.296428 0.264586 0.266954 0.992630 0.003997 0.004033 0.000922
1 664.748677 8074.710478 1134.598444 1139.743149 236.368209 240.189942 0.000325 8075 0.140513 0.141150 0.171093 0.172470 0.995486 0.001904 0.001919 0.001424
2 1099.445749 20225.835545 1590.518926 1600.398088 431.848123 437.309033 0.000325 20219 0.078638 0.079126 0.146121 0.147042 0.993827 0.001028 0.001034 0.001607
3 1657.612548 28846.089374 1271.521961 1278.947120 574.412700 574.238202 0.000325 28727 0.044080 0.044337 0.141113 0.141092 0.994194 0.000833 0.000832 0.002406
In [80]:
plot_eT_mean(rmin=300, dlnr=0.5,min_bin_count=10,
            show_ratio=False,show_mono_chro=True)
r_mean wt_mag_sum eT_mono_times_wt_sum eT_chro_times_wt_sum eX_monosq_times_wt_sum eX_chrosq_times_wt_sum err_numerator_mean bin_count eT_mean_mono eT_mean_chro eX_mean_mono eX_mean_chro eT_ratio eT_mono_err eT_chro_err eT_ratio_err
bin
-3 55.871651 60.755237 9.896165 10.061877 3.619479 3.954027 0.000325 63 0.162886 0.165613 0.244079 0.255110 0.983531 0.030751 0.032141 0.013828
-2 75.471224 26.059639 8.712730 8.855116 3.580052 3.672126 0.000325 36 0.334338 0.339802 0.370647 0.375383 0.983921 0.061774 0.062564 0.008914
-1 163.657925 91.785470 48.254966 48.709079 15.971869 16.621183 0.000325 92 0.525736 0.530684 0.417149 0.425544 0.990677 0.043491 0.044366 0.003558
0 366.024952 4266.880418 1255.499974 1264.821614 298.706786 304.076804 0.000325 4381 0.294243 0.296428 0.264586 0.266954 0.992630 0.003997 0.004033 0.000922
1 664.748677 8074.710478 1134.598444 1139.743149 236.368209 240.189942 0.000325 8075 0.140513 0.141150 0.171093 0.172470 0.995486 0.001904 0.001919 0.001424
2 1099.445749 20225.835545 1590.518926 1600.398088 431.848123 437.309033 0.000325 20219 0.078638 0.079126 0.146121 0.147042 0.993827 0.001028 0.001034 0.001607
3 1657.612548 28846.089374 1271.521961 1278.947120 574.412700 574.238202 0.000325 28727 0.044080 0.044337 0.141113 0.141092 0.994194 0.000833 0.000832 0.002406

Interactive plots

In [81]:
import plotly.graph_objs as go
import plotly.figure_factory as ff
from plotly import tools
from plotly.offline import plot, iplot, init_notebook_mode
init_notebook_mode(connected=False)
In [82]:
df.filter(regex='wt').head(2)
Out[82]:
wt_mag_mono wt_mag_chro wt_mag eT_mono_times_wt eT_chro_times_wt eX_monosq_times_wt eX_chrosq_times_wt eXsq_times_wt eX_monosq_times_wt_std eX_chrosq_times_wt_std
0 1.789438 1.816861 1.803149 0.115825 0.110526 0.003401 0.004156 0.003778 0.065307 0.066161
2 0.280263 0.272254 0.276258 0.002350 -0.000880 0.000954 0.001184 0.001069 0.065307 0.066161
In [83]:
# I need pandas > 0.25 for multiple agg
pd.__version__
Out[83]:
'1.0.0rc0'
In [84]:
def plotly_eT_plot(rmin=400, dlnr=0.4, min_bin_count=20,
                   show_mono_chro=False,
                   show_mono=False,
                   show_chro=False,
                   show_data=True,
                  ):
    
    # title
    title=f'rmin={rmin}, dlnr={dlnr}, min_bin_count={min_bin_count}'
    
    # radial bin
    df['bin'] = ( np.log(df.r / rmin) / dlnr).astype(int)

    # aggregations per given bin
    df_radial_bins = df.groupby('bin').agg(
        r_mean                 = ('r', 'mean'),
        wt_mag_sum             = ('wt_mag', 'sum'),
        eT_mono_times_wt_sum   = ('eT_mono_times_wt', 'sum'),
        eT_chro_times_wt_sum   = ('eT_chro_times_wt', 'sum'),
        eX_monosq_times_wt_sum = ('eX_monosq_times_wt', 'sum'),
        eX_chrosq_times_wt_sum = ('eX_chrosq_times_wt', 'sum'),
        err_numerator_mean     = ('err_numerator', 'mean')
                                           )
    

    # bin counts
    df_radial_bins['bin_count'] = df['bin'].value_counts()
    df_radial_bins = df_radial_bins.query(""" bin_count > @min_bin_count """) 
    df_radial_bins = df_radial_bins.iloc[:-1,:]
        
    # columns after binning 
    df_radial_bins['eT_mean_mono'] = (df_radial_bins['eT_mono_times_wt_sum'] / 
                                      df_radial_bins['wt_mag_sum'])

    df_radial_bins['eT_mean_chro'] = (df_radial_bins['eT_chro_times_wt_sum'] /
                                      df_radial_bins['wt_mag_sum'])

    df_radial_bins['eX_mean_mono'] = np.sqrt(
        df_radial_bins['eX_monosq_times_wt_sum'] /
        df_radial_bins['wt_mag_sum']
    )

    df_radial_bins['eX_mean_chro'] = np.sqrt(
        df_radial_bins['eX_chrosq_times_wt_sum'] /
        df_radial_bins['wt_mag_sum']
    )

    df_radial_bins['eT_ratio']     = (df_radial_bins['eT_mean_mono'] /
                                      df_radial_bins['eT_mean_chro'])

    
    # Errors
    sqrt_bin = np.sqrt(df_radial_bins['bin_count'])
    df_radial_bins['eT_mono_err'] = df_radial_bins['eX_mean_mono'] / sqrt_bin
    df_radial_bins['eT_chro_err'] = df_radial_bins['eX_mean_chro'] / sqrt_bin
    df_radial_bins['eT_ratio_err'] = ( np.sqrt(df_radial_bins['err_numerator_mean']/
                                  df_radial_bins['eT_mean_mono'] /
                                  df_radial_bins['eT_mean_chro']) /
                                   sqrt_bin)
    
    # remove first bin of strong lensing
    df_radial_bins = df_radial_bins.iloc[1:,:]
    
    # eT mono vs chro ratio
    sc = go.Scatter( x=df_radial_bins['r_mean'],
                     y=df_radial_bins['eT_ratio'],
                     mode = 'lines+markers',
                     name = 'eT ratio',
                     error_y = dict(
                         type='data',
                         array=df_radial_bins['eT_ratio_err'],
                         visible=True,
                         )
                   )
    
    mydata = [sc]
    
    # layout
    layout = go.Layout(
                    title={
                        'text': title,
                        'y':0.9,
                        'x':0.5,
                        'xanchor': 'center',
                        'yanchor': 'top'},
                    xaxis=dict(
                               title='radius',
                               titlefont=dict(
                               family='Courier New, monospace',
                               size=18,
                               color='#7f7f7f')),
                     yaxis=dict(title='eT',
                                titlefont=dict(
                                          family='Courier New, monospace',
                                          size=18,
                                          color='#7f7f7f')))
    
    myfig = go.Figure(data=mydata, layout=layout)

    
    # also plot eT mono and chro
    # monochromatic
    sc1 = go.Scatter(x=df_radial_bins['r_mean'],
                     y=df_radial_bins['eT_mean_mono'],
                     mode = 'lines+markers',
                     name = 'eT mono',
                     error_y = dict(
                         type='data',
                         array=df_radial_bins['eT_mono_err'],
                         visible=True,
                         )
                    )
        # chromatic
    sc2 = go.Scatter(x=df_radial_bins['r_mean'],
                     y=df_radial_bins['eT_mean_chro'],
                     mode = 'lines+markers',
                     name = 'eT chro',
                     error_y = dict(
                         type='data',
                         array=df_radial_bins['eT_chro_err'],
                         visible=True,
                         )
                    )
    if show_mono_chro:
        mydata= [sc1,sc2]
        myfig = go.Figure(data=mydata, layout=layout)
        
    if show_mono:
        mydata = [sc1]
        myfig = go.Figure(data=mydata, layout=layout)
        myfig.update_layout(
                    xaxis_title="Radius",
                    yaxis_title="eT_mono",
                )
        myfig.update_layout(
                    title={
                        'text': "eT_mono vs radius plot",
                        'y':0.9,
                        'x':0.5,
                        'xanchor': 'center',
                        'yanchor': 'top'}
        )
        
    if show_chro:
        mydata = [sc2]
        myfig = go.Figure(data=mydata, layout=layout)
        myfig.update_layout(
                    xaxis_title="Radius",
                    yaxis_title="eT_chro",
                )
        myfig.update_layout(
                    title={
                        'text': "eT_chro vs radius plot",
                        'y':0.9,
                        'x':0.5,
                        'xanchor': 'center',
                        'yanchor': 'top'})
    
    # this is for all cases
    myfig.update_layout(showlegend=True)

    
    # also show the dataframe
    if show_data:
        display(df_radial_bins.style.background_gradient(subset=['eT_mean_mono']))

    # iplot plots in jupyter-notebook, plot opens in new tab.
    return iplot(myfig, filename='myplot.html')

# plot
plotly_eT_plot(rmin=400, dlnr=0.4, min_bin_count=20,
                   show_mono_chro=False,
                   show_mono=False,
                   show_chro=False,
                   show_data=True,
                  )
r_mean wt_mag_sum eT_mono_times_wt_sum eT_chro_times_wt_sum eX_monosq_times_wt_sum eX_chrosq_times_wt_sum err_numerator_mean bin_count eT_mean_mono eT_mean_chro eX_mean_mono eX_mean_chro eT_ratio eT_mono_err eT_chro_err eT_ratio_err
bin
-4 65.888307 60.804742 17.762141 18.109758 5.717455 6.014620 0.000325 68 0.292118 0.297835 0.306643 0.314511 0.980805 0.037186 0.038140 0.007412
-2 163.559444 84.090672 43.960071 44.423741 15.043699 15.720483 0.000325 84 0.522770 0.528284 0.422964 0.432373 0.989563 0.046149 0.047176 0.003743
-1 232.817624 689.456146 308.411024 310.353776 89.134889 90.749371 0.000325 703 0.447325 0.450143 0.359559 0.362801 0.993740 0.013561 0.013683 0.001515
0 450.603641 5852.545949 1342.451010 1351.830777 288.496628 293.651898 0.000325 5937 0.229379 0.230982 0.222023 0.223998 0.993061 0.002881 0.002907 0.001016
1 750.370496 8032.158589 979.482756 984.570240 213.226901 217.050732 0.000325 8117 0.121945 0.122579 0.162931 0.164386 0.994833 0.001808 0.001825 0.001637
2 1121.649708 17192.646611 1306.250138 1313.676973 363.038567 367.128281 0.000325 17142 0.075977 0.076409 0.145313 0.146129 0.994347 0.001110 0.001116 0.001807
3 1605.922300 26998.337152 1241.464746 1249.260972 531.295969 532.147537 0.000325 26830 0.045983 0.046272 0.140281 0.140394 0.993759 0.000856 0.000857 0.002386
In [85]:
plotly_eT_plot(rmin=300, dlnr=0.5,min_bin_count=20,show_mono_chro=True)
r_mean wt_mag_sum eT_mono_times_wt_sum eT_chro_times_wt_sum eX_monosq_times_wt_sum eX_chrosq_times_wt_sum err_numerator_mean bin_count eT_mean_mono eT_mean_chro eX_mean_mono eX_mean_chro eT_ratio eT_mono_err eT_chro_err eT_ratio_err
bin
-2 75.471224 26.059639 8.712730 8.855116 3.580052 3.672126 0.000325 36 0.334338 0.339802 0.370647 0.375383 0.983921 0.061774 0.062564 0.008914
-1 163.657925 91.785470 48.254966 48.709079 15.971869 16.621183 0.000325 92 0.525736 0.530684 0.417149 0.425544 0.990677 0.043491 0.044366 0.003558
0 366.024952 4266.880418 1255.499974 1264.821614 298.706786 304.076804 0.000325 4381 0.294243 0.296428 0.264586 0.266954 0.992630 0.003997 0.004033 0.000922
1 664.748677 8074.710478 1134.598444 1139.743149 236.368209 240.189942 0.000325 8075 0.140513 0.141150 0.171093 0.172470 0.995486 0.001904 0.001919 0.001424
2 1099.445749 20225.835545 1590.518926 1600.398088 431.848123 437.309033 0.000325 20219 0.078638 0.079126 0.146121 0.147042 0.993827 0.001028 0.001034 0.001607
3 1657.612548 28846.089374 1271.521961 1278.947120 574.412700 574.238202 0.000325 28727 0.044080 0.044337 0.141113 0.141092 0.994194 0.000833 0.000832 0.002406
In [86]:
plotly_eT_plot(rmin=300, dlnr=0.5,min_bin_count=20,show_data=False,show_mono=True)
In [87]:
plotly_eT_plot(rmin=300, dlnr=0.5,min_bin_count=20,show_data=False,show_chro=True)

ipywidgets

which pip
pip install ipywidgets
jupyter nbextension enable --py --sys-prefix widgetsnbextension
In [88]:
import ipywidgets as widgets
from ipywidgets import HBox, VBox
from ipywidgets import interactive
In [89]:
rmin_ = widgets.IntSlider(min=0, max=500, step=50, value=300)
dlnr_  = widgets.FloatSlider(min=0.1,max=0.8,step=0.05,value=0.5)
min_bin_count_ = widgets.IntSlider(min=10, max=600, step=20, value=20)
interactive_plot = interactive(plotly_eT_plot,
                               rmin=rmin_,
                               dlnr=dlnr_,
                               min_bin_count=min_bin_count_)

output = interactive_plot.children[-1]
interactive_plot
Widget Javascript not detected.  It may not be installed or enabled properly.

Time Taken

In [90]:
time_taken = time.time() - time_start_notebook
h,m = divmod(time_taken,60*60)
print('Time taken to run whole notebook: {:.0f} hr '\
      '{:.0f} min {:.0f} secs'.format(h, *divmod(m,60)))
Time taken to run whole notebook: 0 hr 0 min 43 secs
In [91]:
import subprocess
subprocess.call(['python', '-m', 'nbconvert', '*.ipynb'])
Out[91]:
0
In [92]:
!python get_nbviewer_links.py
pwd /Users/poudel/Research/a06_shear_imcat/a01_shear_analysis_dmstack/a01_dmstack_analysis_object_distribution/a05_jan22
Username: bpRsh
Repo    : 2019_shear_analysis_after_dmstack
Folder  : a05_jan22
Notebooks:  ['a01_dmstack_csv_cleaning.ipynb', 'a02_ngals10k_20k_compare.ipynb', 'a03_dmstack_analysis_ngals10k_000_199.ipynb']
In [ ]: